/* code used in Tables 3 and 4.
This file contains routines which do the following:

make_deviations:  will make deviations from state and year fixed effects, or year-only fixed effects.
np_boot:  runs the non-parametric bootstrap, first cluster-sampling, and then taking the deviations from fixed effects.
quickreg_1:  a mata program which runs OLS and computes cluster-robust standard errors.  Hopefully runs more quicly than
stata's built in commands (less data checking, etc).  The intent is to speed up the bootstraps.
*/


#delimit ;


cap prog drop table3_np_boot ;
prog def table3_np_boot , rclass ;
syntax  , lhs(string) rhs(string) keyrhs(string) clusterid(string) [ yearonly ] ;

/* transform data to be "deviations from state and year dummies".  Then run cluster-robust regression */
cap drop `lhs'_deviation `rhs'_deviation ;
make_deviations `lhs' `rhs' , `yearonly' ;

mata: quickreg_1("`lhs'_deviation","`rhs'_deviation","`clusterid'") ;
local matabeta = mybeta ;
local matase = myse ;
return scalar mybeta = `matabeta' ;
return scalar myse = `matase' ;

end ;

cap prog drop table4_np_boot ;
prog def table4_np_boot , rclass ;
syntax  , lhs(string) rhs(string) keyrhs(string) clusterid(string) ;

/* transform data to be "deviations from state and year dummies".  Then run cluster-robust regression */
cap drop `lhs'_deviation `rhs'_deviation ;
make_deviations `lhs' `rhs'  ;

mata: quickreg_1("`lhs'_deviation","`rhs'_deviation","`clusterid'") ;
local matabeta = mybeta ;
local matase = myse ;
return scalar mybeta = `matabeta' ;
return scalar myse = `matase' ;

end ;


cap prog drop make_deviations ;
prog def make_deviations ;
syntax varlist [, yearonly] ;

/* create a "deviations from state and year fixed effects" */
/* needs to have "state averages" already defined, and needs variable "year" */

if "`yearonly'" == "" { ;

	foreach var in `varlist' { ;
		gen ds_`var' = `var' - `var'_avg_s ;
		egen yavg_ds_`var' = mean(ds_`var') , by(year) ;
		cap drop `var'_deviation ;
		gen `var'_deviation = ds_`var' - yavg_ds_`var' ;
		cap drop ds_`var' yavg_ds_`var' ;
	} ;
} ;

if "`yearonly'" == "yearonly" { ;

	foreach var in `varlist' { ;
		gen `var'_deviation = `var' - `var'_avg_y ;
	} ;
} ;


end ;


cap mata: mata drop quickreg_1() ;
cap mata: mata drop tester() ;

mata ;



void quickreg_1(lhs,rhs,clusterid) { 

st_view(Y=., .,tokens(lhs))
st_view(X=., .,tokens(rhs))
st_view(cluid=., .,tokens(clusterid))




// create a cluster-list, and determine the number of clusters 
clusteridlist = uniqrows(cluid)
numclusters = rows(clusteridlist)
numobs = rows(X) 
k = cols(X) 

ones = J(numobs,1,1)


// take the X matrix, sort by cluster id.  Keep the original obsid, for sorting back later if needed
sorted = sort((cluid,Y,X),1)

newX = sorted[|1,3 \ .,.|]
newY = sorted[|1,2 \ .,2|]

XpX = newX' * newX 
XpXinv = invsym(XpX) 


mybeta = XpXinv * newX' * newY

resid = newY - (newX * mybeta)

sandwich_middle = J(k,k,0)

// loop over the clusters, build the middle of the sandwich matrix
i_g = 1 
while(i_g<=numclusters) { 
	vec_this_cluid = ones*clusteridlist[i_g]  // clusteridlist[i] gives the cluster id we are considering.
	
	// build an index for each cluster, where does it start and where does it end,
	// 		in terms of the obsid in newX (which is sorted on cluid).
	match_id =  (sorted[.,1] :== vec_this_cluid)	
	X_g = select(newX,match_id)     // takes newX, extracts rows with match_id == 1
	resid_g = select(resid,match_id)     // takes newX, extracts rows with match_id == 1

	middle_g = X_g' * resid_g * resid_g' * X_g
	sandwich_middle = sandwich_middle + middle_g
	
	i_g++ 
} 

// figure out the constant ... following Stata's DOF correction:
// ((G) / (G-1))    *   ((N-1) / (N-k))
constant = (numclusters/(numclusters - 1)) * ((numobs-1)/(numobs-k))
vhat = constant * XpXinv * sandwich_middle * XpXinv
var = vhat[1,1]
stderr = sqrt(var)

st_numscalar("mybeta",mybeta)
st_numscalar("myse",stderr)

} 
void tester() {
x = 1234
x
}


tester()

end 
